In this script we assess whether perturbation indicators are associated with any of the technical covariates.

First we read the results generated by perturbation_propensity.R:

results_file <- paste0(
  .get_config_path("LOCAL_SCEPTRE2_DATA_DIR"),
  "results/perturbation_propensity_analysis/results.rds"
)
results <- readRDS(results_file)
results
## # A tibble: 4,524 × 8
##    paper    dataset    ntc           covariate estimate std_error  zvalue pvalue
##    <chr>    <chr>      <chr>         <chr>        <dbl>     <dbl>   <dbl>  <dbl>
##  1 frangieh co_culture ONE-NON-GENE… n_nonzero -1.60e-3 0.00116   -1.38   0.169 
##  2 frangieh co_culture ONE-NON-GENE… n_umis     2.48e-5 0.0000950  0.260  0.795 
##  3 frangieh co_culture ONE-NON-GENE… cluster_x  2.82e-1 0.239      1.18   0.238 
##  4 frangieh co_culture ONE-NON-GENE… cluster_y  3.62e-1 0.205      1.76   0.0777
##  5 frangieh co_culture ONE-NON-GENE… s_score   -1.02e-1 1.95      -0.0523 0.958 
##  6 frangieh co_culture ONE-NON-GENE… g2m_score -2.56e+0 1.47      -1.74   0.0812
##  7 frangieh co_culture ONE-NON-GENE… phaseG2M   9.58e-1 0.781      1.23   0.220 
##  8 frangieh co_culture ONE-NON-GENE… phaseS     7.95e-2 0.685      0.116  0.908 
##  9 frangieh co_culture ONE-NON-GENE… batchB3    6.37e-1 0.631      1.01   0.312 
## 10 frangieh co_culture ONE-NON-GENE… batchC3   -3.76e-1 0.768     -0.489  0.625 
## # … with 4,514 more rows

Next, for each dataset, we plot the \(p\)-values for each covariate across NTCs:

datasets <- results |>
  arrange(paper) |>
  pull(dataset) |>
  unique()
for (dataset in datasets) {
  results_dataset <- results |> filter(dataset == !!dataset)
  paper <- results_dataset |>
    pull(paper) |>
    unique()
  p_hist <- results_dataset |>
    ggplot(aes(x = pvalue)) +
    geom_histogram(colour = "black", binwidth = 0.1, boundary = 0) +
    facet_wrap(~covariate, scales = "free") +
    scale_x_continuous(limits = c(0, 1)) +
    labs(title = sprintf("%s %s", paper, dataset)) +
    theme_bw()
  plot(p_hist)

  p_qq <- results_dataset |>
    ggplot(aes(y = pvalue)) +
    stat_qq_points() +
    stat_qq_band() +
    geom_abline() +
    facet_wrap(~covariate, scales = "free") +
    scale_x_continuous(trans = revlog_trans(base = 10)) +
    scale_y_continuous(trans = revlog_trans(base = 10)) +
    labs(title = sprintf("%s %s", paper, dataset)) +
    theme_bw()
  plot(p_qq)
}